Final Project - John Williams

Load libraries

library(tidyverse)
library(raster)          #raster()
library(sf)              #st_read()
library(ggspatial)       #annotation_scale,annotation_north_arrow
library(ggnewscale)      #new_scale_color() 
#library(ggsn)            #scalebar()
library(plotly)          #plot_ly()
library(gridExtra)       #grid.arrange()

Set working directory

setwd("~/GitHub/DSCI605/Mod16")

Read the files

##################Read csv and shape file into R
# learners will have this data loaded from an earlier episode, st_read needs sf packages
Unemployrate <-read_csv("data/unemployment_county.csv")
## Rows: 37674 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): County, State
## dbl (2): Unemployment Rate, Year
## num (3): Labor Force, Employed, Unemployed
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Crimerate<-read_csv("data/crime_and_incarceration_by_state.csv")
## Rows: 816 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): jurisdiction
## dbl (13): year, prisoner_count, state_population, violent_crime_total, murde...
## lgl  (3): includes_jails, crime_reporting_change, crimes_estimated
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
States<-st_read("data/tl_2019_us_state/tl_2019_us_state.shp")
## Reading layer `tl_2019_us_state' from data source 
##   `C:\Users\jowilli1\Documents\GitHub\DSCI605\Mod16\data\tl_2019_us_state\tl_2019_us_state.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS:  NAD83

Process US data

######## Focus on Contiguous USA
Contiguous_state <- States%>% filter(STUSPS!="AK"& 
                                       STUSPS!="AS"& 
                                       STUSPS!="MP"& 
                                       STUSPS!="PR"& 
                                       STUSPS!="VI"& 
                                       STUSPS!="HI"& 
                                       STUSPS!="GU")

###Check the length of states.
length(unique(Contiguous_state$STUSPS))
## [1] 49

Process Unemployment data

##############Unemployrate 

##Check the length of State
unique(Unemployrate $State)
##  [1] "AL" "AK" "AZ" "AR" "CA" "CO" "CT" "DE" "FL" "GA" "HI" "ID" "IL" "IN" "IA"
## [16] "KS" "KY" "LA" "ME" "MD" "MA" "MI" "MN" "MS" "MO" "MT" "NE" "NV" "NH" "NJ"
## [31] "NM" "NY" "NC" "ND" "OH" "OK" "OR" "PA" "RI" "SC" "SD" "TN" "TX" "UT" "VT"
## [46] "VA" "WA" "WV" "WI" "WY"
length(unique(Unemployrate $State))
## [1] 50
Unemployrate <-  Unemployrate %>% filter(State!="AK"& State!="HI") %>%
  group_by(State,Year) %>% 
  summarise(Totalforce=sum(`Labor Force`),
            Totalemployed=sum(Employed),
            Totalunemployed=sum(Unemployed),
            Meanrate=mean(`Unemployment Rate`,rm.na=TRUE)
            #Meanrate = (Totalunemployed/Totalforce)*100
  )
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
##Check the length of State again  
length(unique(Unemployrate $State))
## [1] 48
##Change the column name “State” into  "STUSPS" by using rename().Use   filter() to pick some years data from 2007 to 2014.
Unemployrate <- Unemployrate %>% 
  rename("STUSPS"="State") %>% 
  filter(Year %in% c(2007:2014))

Process crime data

##############Crimerate

##Check the length of states
unique(Crimerate$jurisdiction)
##  [1] "FEDERAL"        "ALABAMA"        "ALASKA"         "ARIZONA"       
##  [5] "ARKANSAS"       "CALIFORNIA"     "COLORADO"       "CONNECTICUT"   
##  [9] "DELAWARE"       "FLORIDA"        "GEORGIA"        "HAWAII"        
## [13] "IDAHO"          "ILLINOIS"       "INDIANA"        "IOWA"          
## [17] "KANSAS"         "KENTUCKY"       "LOUISIANA"      "MAINE"         
## [21] "MARYLAND"       "MASSACHUSETTS"  "MICHIGAN"       "MINNESOTA"     
## [25] "MISSISSIPPI"    "MISSOURI"       "MONTANA"        "NEBRASKA"      
## [29] "NEVADA"         "NEW HAMPSHIRE"  "NEW JERSEY"     "NEW MEXICO"    
## [33] "NEW YORK"       "NORTH CAROLINA" "NORTH DAKOTA"   "OHIO"          
## [37] "OKLAHOMA"       "OREGON"         "PENNSYLVANIA"   "RHODE ISLAND"  
## [41] "SOUTH CAROLINA" "SOUTH DAKOTA"   "TENNESSEE"      "TEXAS"         
## [45] "UTAH"           "VERMONT"        "VIRGINIA"       "WASHINGTON"    
## [49] "WEST VIRGINIA"  "WISCONSIN"      "WYOMING"
length(unique(Crimerate$jurisdiction))
## [1] 51
head(Crimerate)
## # A tibble: 6 × 17
##   jurisdiction includes_jails  year prisoner_count crime_reporting_change
##   <chr>        <lgl>          <dbl>          <dbl> <lgl>                 
## 1 FEDERAL      FALSE           2001         149852 NA                    
## 2 ALABAMA      FALSE           2001          24741 FALSE                 
## 3 ALASKA       TRUE            2001           4570 FALSE                 
## 4 ARIZONA      FALSE           2001          27710 FALSE                 
## 5 ARKANSAS     FALSE           2001          11489 FALSE                 
## 6 CALIFORNIA   FALSE           2001         157142 FALSE                 
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## #   violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## #   rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## #   property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## #   vehicle_theft <dbl>
###Change the column name 
Crimerate <-  Crimerate %>% 
  rename("STUSPS"="jurisdiction") %>% 
  rename("Year"="year") %>% 
  filter(STUSPS!="FEDERAL"& STUSPS!="ALASKA"& STUSPS!="HAWAII") %>% 
  filter(Year %in% c(2007:2014))

##Recheck the data
length(unique(Crimerate$STUSPS))
## [1] 48
head(Crimerate)
## # A tibble: 6 × 17
##   STUSPS      includes_jails  Year prisoner_count crime_reporting_change
##   <chr>       <lgl>          <dbl>          <dbl> <lgl>                 
## 1 ALABAMA     FALSE           2007          25253 FALSE                 
## 2 ARIZONA     FALSE           2007          37700 FALSE                 
## 3 ARKANSAS    FALSE           2007          13275 FALSE                 
## 4 CALIFORNIA  FALSE           2007         171444 FALSE                 
## 5 COLORADO    FALSE           2007          22666 FALSE                 
## 6 CONNECTICUT TRUE            2007          19438 FALSE                 
## # ℹ 12 more variables: crimes_estimated <lgl>, state_population <dbl>,
## #   violent_crime_total <dbl>, murder_manslaughter <dbl>, rape_legacy <dbl>,
## #   rape_revised <dbl>, robbery <dbl>, agg_assault <dbl>,
## #   property_crime_total <dbl>, burglary <dbl>, larceny <dbl>,
## #   vehicle_theft <dbl>
###Changes the state names in the state column "STUSPS"
Crimerate$STUSPS <- state.abb[match(str_to_title(Crimerate$STUSPS),state.name)]
###Calculate the crimerate
Crimerate <- Crimerate %>% 
  mutate(Crimerate=(violent_crime_total/state_population)*100) %>% 
  dplyr::mutate_if(is.numeric, round, 1)

Save the data frames

save(list = c("Crimerate", "Unemployrate","Contiguous_state"), file = "CleaneData.Rdata")

Read the data

##################Read data file into R
load(file = "CleaneData.Rdata")

Join Tables

########Join the rational tables and check the missing values
CS_Erate<-right_join(Contiguous_state, Unemployrate, 
                     by = c("STUSPS"))

CS_Erate_Crate <- right_join(CS_Erate, Crimerate, 
                             by = c("STUSPS","Year"))
CS_Erate_Crate1 <- CS_Erate_Crate %>% 
  dplyr::select(REGION,STUSPS,NAME,Year,Meanrate,Crimerate) %>% 
  dplyr::rename("Unemplyrate"="Meanrate")

#############Check the missing values

which(is.na(CS_Erate_Crate1$REGION))
## integer(0)

Create Plots

Prep for plots

## Perform the filter for the Year, preserving geometry only from CS_Erate_Crate1
CS_Erate_Crate2 <- CS_Erate_Crate1 %>%
  filter(Year == 2014)

Plot 1

# Plot 1 The unemployment spatial map in 2014 over contiguous USA
CS_Erate_Crate2 %>%   # Provide data to ggplot
  ggplot() +          # Start the graph
  geom_sf(aes(fill = Unemplyrate), color = "white", size = 0.3) +   # Plot the US map
  scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Year2014-Unemployment Rate",
                       limits = c(3, 10)) +    # Set up the legend
  #scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
  labs(title = "Unemployment Rate Map Over Contiguous US") +   # Plot title
  #scale bar
  annotation_scale(location = "bl", width_hint=0.5) +   #Alternative scale bar for distance
  #scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
           #dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
  ## Add North Arrow
  annotation_north_arrow(location="br", which_north="true",
                         style = north_arrow_fancy_orienteering) +
  theme(legend.position="right",       # Put legend on the right
        plot.title = element_text(hjust = 0.5,       # Tweak plot title
                                  color = "Gray40",
                                  size = 16,
                                  face = "bold")) +
  xlab("Longitude") + ylab("Latitude")    # Label x and y axis

Plot 2

#Plot 2 The crimerate spatial map in 2014 over contiguous USA
CS_Erate_Crate2 %>%        # Provide data to ggplot
  ggplot() +              # Start the graph
  geom_sf(aes(fill = Crimerate), color = "white", size = 0.3) +     # Plot the US map
  scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Year2014-Crime Rate",
                       limits = c(.1, .6)) +        # Set up the legend
  #scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
  labs(title = "Crime Rate Map Over Contiguous US") +     # Set up the title
  #scale bar
  annotation_scale(location = "bl", width_hint=0.5) +     #Alternative scale bar for distance
  #scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
  #dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
  ## Add North Arrow
  annotation_north_arrow(location="br", which_north="true",
                         style = north_arrow_fancy_orienteering) +
  theme(legend.position="right",       #change 'top' to specific position by c(2.5,0.8) Position legend to the right
        plot.title = element_text(hjust = 0.5,         # Tweak plot title
                                  color = "Gray40",
                                  size = 16,
                                  face = "bold")) +
  xlab("Longitude") + ylab("Latitude")                 # Label x and y axis

Plot 3

# Plot 3 Region 1 Unemployment
Region1 <- CS_Erate_Crate2 %>%
  filter(REGION == 1, Year == 2014)

Region1 %>%
  ggplot() +
  geom_sf(aes(fill = Unemplyrate), color = "white", size = 0.3) +
  scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Region 1 - Unemployment Rate") +
  #scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
  labs(title = "Region 1 Unemployment Rate Map") +
  #scale bar
  annotation_scale(location = "bl", width_hint=0.5) +
  #scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
  #dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
  ## Add North Arrow
  annotation_north_arrow(location="br", which_north="true",
                         # pad_x = unit(2.5, "in"), pad_y = unit(0.2, "in"),
                         style = north_arrow_fancy_orienteering) +
  theme(legend.position="right",       #change 'top' to specific position by c(2.5,0.8)
        plot.title = element_text(hjust = 0.5,
                                  color = "Gray40",
                                  size = 16,
                                  face = "bold"),
        plot.subtitle = element_text(color="blue"),
        plot.caption = element_text(color="Gray60")) +
  xlab("Longitude") + ylab("Latitude")

Plot 4

#Plot 4 Crime Rate Region 2
Region2 <- CS_Erate_Crate2 %>%
  filter(REGION == 2, Year == 2014)

Region2 %>%
  ggplot() +
  geom_sf(aes(fill = Crimerate), color = "white", size = 0.3) +
  scale_fill_gradientn(colors = c("darkblue", "lightblue"), name = "Region 2 - Crime Rate") +
  #scale_color_manual(values = c("B"="blue"), labels = c("Area of Interest"), name="") +
  labs(title = "Region 2 Crime Rate Map") +
  #scale bar
  annotation_scale(location = "bl", width_hint=0.5) +
  #scalebar(data=IN, location = "bottomleft", anchor = c(x=-88.5, y=37), dist = 150,
  #dist_unit="km", transform=TRUE, model="WGS84", st.dist=0.04)+
  ## Add North Arrow
  annotation_north_arrow(location="br", which_north="true",
                         # pad_x = unit(2.5, "in"), pad_y = unit(0.2, "in"),
                         style = north_arrow_fancy_orienteering) +
  theme(legend.position="right",       #change 'top' to specific position by c(2.5,0.8)
        plot.title = element_text(hjust = 0.5,
                                  color = "Gray40",
                                  size = 16,
                                  face = "bold"),
        plot.subtitle = element_text(color="blue"),
        plot.caption = element_text(color="Gray60")) +
  xlab("Longitude") + ylab("Latitude")

Plot 5

## Plot 5 Scatter Plot with colors by Region
# Create a plot with colors by Region
CS_Erate_Crate2$geometry <- NULL  #Drop geometry column for plotly

plot_ly(
  data = CS_Erate_Crate2,          # Use this data file 
  x = ~Crimerate,                  # Use Crimerate for the x axis
  y = ~Unemplyrate,                # Use Unemplyrate for the y axis
  type = 'scatter',                # Create an interactive scatter plot
  mode = 'markers',                # Create the points at the intersection of the two
  color = ~as.factor(REGION)       # Color the points by the region they are in
) %>%
  layout(
    title = "Unemployment Rate and Crime Rate in 2014",       # Title
    xaxis = list(title = "Crime Rate per 100 people"),        # x-axis label
    yaxis = list(title = "UnemploymentRates per 100 people")  # y-axis label
  )

Plot 6

# Plot 6 The time series visualization of Unemployment Rate for some states
#Select states
states_to_plot <- c("California", "Idaho", "Illinois", "Indiana")
CS_Erate_Crate1$geometry <- NULL      #Drop geometry column for plotly

# Filter data for the specified states and years
ts_data <- CS_Erate_Crate1 %>%
  filter(Year >= 2007, Year <= 2014, NAME %in% states_to_plot)

# Create the plotly Unemployment time series plot
plot_ly(data = ts_data,           # Use ts_data for plot
        x = ~Year,                # Use Year for time series x axis variable
        y = ~Unemplyrate,         # Use Unemployrate for y axis
        color = ~NAME,            # Color points by state name
        type = 'scatter',         # Set type to scatter for lines and markers
        mode = 'lines+markers',   # Use lines and markers
        text = ~paste("Year:", Year, "<br>Unemployment Rate:", Unemplyrate),
        hoverinfo = 'text'        # Show tooltip text
) %>%
  layout(
    title = "Unemployment Rate Changes Along with Years",  # Title
    xaxis = list(title = "Year", tickvals = 2007:2014),    # x-axis label and ticks
    yaxis = list(title = "Unemployment Rate", range = c(3, 14)),  # y-axis label and range
    legend = list(title = list(text = ""))  # Remove legend title
  )

Plot 7

# Plot 7 The time series visualization of Crimerate for some states
# Define states to plot
states_to_plot <- c("California", "Idaho", "Florida", "Indiana")

# Filter data for the specified states and years
ts1_data <- CS_Erate_Crate1 %>%
  filter(Year >= 2007, Year <= 2014, NAME %in% states_to_plot)

# Create the plotly time series plot
plot_ly(data = ts1_data,          # use ts1_data for plot
        x = ~Year,                # Use Year for time series x axis variable
        y = ~Crimerate,           # Use Crimerate variable for y axis
        color = ~NAME,            # Color points by state name
        type = 'scatter',         # Set type to scatter for lines and markers
        mode = 'lines+markers',   # Use lines and markers
        text = ~paste("Year:", Year, "<br>Crime Rate:", Crimerate),
        hoverinfo = 'text'        # Show tooltip text
) %>%
  layout(
    title = "Crime Rate Changes Along with Years",  # Title
    xaxis = list(title = "Year", tickvals = 2007:2014),    # x-axis label and ticks
    yaxis = list(title = "Crime Rate", range = c(.1, .8)),  # y-axis label and range
    legend = list(title = list(text = ""))  # Remove legend title
  )